## creates figures for main results section
## figure 2, figure 3, table 2, table 3

rm(list=ls())

library(tidyverse)
library(kableExtra)

# read in bootstrapped opinion data

opinion_bootstrapped <- readRDS('bootstrap_gaps_data/opinion_bootstrapped_noleaners.RDS')

for (i in names(opinion_bootstrapped)) {
  assign(i, opinion_bootstrapped[[i]])
}

# set up matrices/vectors of polarization data
# to feed into plots/figures

dem_polarization_boot <- dem_top_boot - dem_bottom_boot
rep_polarization_boot <- rep_top_boot - rep_bottom_boot
all_polarization_boot <- all_top_boot - all_bottom_boot

dem_issue_polarization <- abs(apply(dem_polarization_boot, MARGIN = 1, median))
rep_issue_polarization <- abs(apply(rep_polarization_boot, MARGIN = 1, median))
all_issue_polarization <- abs(apply(all_polarization_boot, MARGIN = 1, median))

dem_avg_polarization <- apply(abs(dem_polarization_boot), MARGIN = 2, mean)
rep_avg_polarization <- apply(abs(rep_polarization_boot), MARGIN = 2, mean)
all_avg_polarization <- apply(abs(all_polarization_boot), MARGIN = 2, mean)

dem_med_polarization <- apply(abs(dem_polarization_boot), MARGIN = 2, median)
rep_med_polarization <- apply(abs(rep_polarization_boot), MARGIN = 2, median)
all_med_polarization <- apply(abs(all_polarization_boot), MARGIN = 2, median)


# figure 2 - histogram of polarization

polhist.df <- as.data.frame(cbind(dem_issue_polarization, rep_issue_polarization, all_issue_polarization)) %>% 
  rownames_to_column('question') %>% 
  gather(key = party, value = opinion, -question) %>% 
  mutate(party = case_when(party == 'rep_issue_polarization' ~ 'Republican',
                           party == 'dem_issue_polarization' ~ 'Democrat',
                           party == 'all_issue_polarization' ~ 'All')) %>% 
  mutate(party = factor(party, levels = c('All', 'Democrat', 'Republican')))

polvline.df <- data.frame(party = c('All', 'Democrat', 'Republican'),
                          mean = c(median(all_avg_polarization),
                                   median(dem_avg_polarization),
                                   median(rep_avg_polarization)),
                          median = c(median(all_med_polarization),
                                     median(dem_med_polarization),
                                     median(rep_med_polarization))) %>% 
  gather(key = 'stat', value = 'average', mean, median)

ggplot(polhist.df,
       aes(x = opinion)) +
  geom_histogram(aes(fill = party), color = 'black', binwidth = 0.03, alpha = 0.5) +
  geom_vline(data = polvline.df,
             aes(xintercept = average, color = party, linetype = stat), linewidth = 0.75) +
  facet_grid(party ~ .) +
  scale_fill_manual(values = c('darkgray', 'blue', 'red', 'purple')) +
  scale_color_manual(values = c('black', 'darkblue', 'darkred', 'purple3')) +
  labs(y = '',
       x = 'Opinion Gap for Issue, Difference between Top and Bottom Deciles') +
  theme_bw() +
  theme(legend.position = 'none')
ggsave('figures/boot_polarization_hist_by_party.png', width = 8, height = 5.5)



# table 2 - opinion gaps by domain and party

polarization_tab <- data.frame(expand.grid(party = c('rep', 'dem', 'all'),
                                           topic_6 = unique(issuetopics$topic_6),
                                           stringsAsFactors = FALSE),
                               avgpol = NA,
                               signif = NA,
                               gilens = NA,
                               gilenssignif = NA,
                               disagree = NA,
                               disagreegilens = NA,
                               n_quest = NA,
                               stringsAsFactors = FALSE)

for (i in 1:nrow(polarization_tab)) {
  temp.issues <- issuetopics$question[issuetopics$topic_6 == polarization_tab$topic_6[i]]
  temp.rich <- get(paste0(polarization_tab$party[i], '_top_boot'))
  temp.rich <- temp.rich[rownames(temp.rich) %in% temp.issues,]
  temp.poor <- get(paste0(polarization_tab$party[i], '_bottom_boot'))
  temp.poor <- temp.poor[rownames(temp.poor) %in% temp.issues,]
  temp.pol <- get(paste0(polarization_tab$party[i], '_polarization_boot'))
  temp.pol <- temp.pol[rownames(temp.pol) %in% temp.issues,]
  
  polarization_tab$avgpol[i] <- median(colMeans(abs(temp.pol)))
  
  temp.ci <- apply(temp.rich - temp.poor, MARGIN = 1, function(x) quantile(x, c(0.025, 0.975)))
  polarization_tab$signif[i] <- sum(!apply(temp.ci, MARGIN = 2, function(x) between(0, x[1], x[2])))
  
  polarization_tab$gilens[i] <- sum(abs(apply(temp.pol, MARGIN = 1, median)) > 0.1)
  
  temp.ci <- apply(temp.pol, MARGIN = 1, function(x) quantile(x, c(0.025, 0.975)))
  polarization_tab$gilenssignif[i] <- sum(apply(temp.ci, MARGIN = 2, function(x) all(x > 0.1) | all(x < -0.1)))
  
  temp.10pt <- apply(temp.ci, MARGIN = 2, function(x) all(x > 0.1) | all(x < -0.1))
  
  temp.disagree <- abs(apply((temp.rich > 0.5) - (temp.poor > 0.5), MARGIN =  1, median))
  
  polarization_tab$disagree[i] <- sum(temp.disagree)
  
  polarization_tab$disagreegilens[i] <- sum(temp.disagree*as.numeric(temp.10pt))
  
  polarization_tab$n_quest[i] <- length(temp.issues)
}

for (p in unique(polarization_tab$party)) {
  i <- nrow(polarization_tab) + 1
  polarization_tab[i,] <- NA
  polarization_tab$topic_6[i] <- 'Total'
  polarization_tab$party[i] <- p
  
  temp.pol <- get(paste0(polarization_tab$party[i], '_polarization_boot'))
  
  polarization_tab$avgpol[i] <- median(colMeans(abs(temp.pol)))
  
  for (v in c('signif', 'gilens', 'gilenssignif', 'disagree', 'disagreegilens', 'n_quest')) { 
    polarization_tab[i,v] <- sum(polarization_tab[polarization_tab$party == p, v], na.rm = TRUE)
  }
}

polarization_tab %>% 
  select(party, topic_6, n_quest, avgpol, signif, gilens, disagreegilens) %>% 
  arrange(party) %>%
  mutate(party = case_when(party == 'rep' ~ 'Republicans',
                           party == 'dem' ~ 'Democrats',
                           party == 'all' ~ 'All Respondents'),
         topic_6 = case_when(topic_6 == 'foreignpolicy' ~ 'Foreign policy',
                             topic_6 == 'cultural' ~ 'Cultural',
                             topic_6 == 'immigration' ~ 'Immigration',
                             topic_6 == 'lawenforcement' ~ 'Law enforcement',
                             topic_6 == 'economic' ~ 'Economic',
                             topic_6 == 'socialwelfare' ~ 'Social welfare',
                             TRUE ~ topic_6)) %>% 
  mutate_at(vars(signif:disagreegilens), funs(. / n_quest)) %>% 
  mutate_if(is.numeric, round, 3) %>% 
  mutate_at(vars(signif:disagreegilens), funs(paste0(. * 100, '\\%'))) %>% 
  mutate(party = cell_spec(party, angle = 90, format = 'latex')) %>% 
  kable('latex', booktabs = TRUE, escape = FALSE, linesep = '\\addlinespace',
        col.names = linebreak(c(' ', 'Policy\nDomain', 'Number of\nUnique Issues', 'Average\nOpinion Gap', 'Opinion Gap\nStatistically\nSignificant', 
                                'Opinion Gap\nGreater than\n10 pts.', '10 pt. Gap +\nDisagreement\non Policy'),
                              align = 'c'),
        align = c('l', 'l', rep('c', ncol(.)-2))) %>%
  collapse_rows(1, latex_hline = 'major') %>% 
  row_spec(c(7, 14, 21), bold = TRUE) %>% 
  save_kable(file = 'tables/detailed_polarization_summary.tex')



# figure 3 - meaningful gaps at various thresholds

thresholds_df <- expand.grid(question = issuetopics$question,
                             party = c('rep', 'dem', 'all'),
                             threshold = (1:50)/100,
                             polarized = NA,
                             stringsAsFactors = FALSE)

for (i in 1:nrow(thresholds_df)) {
  if (i %% floor(nrow(thresholds_df)/10) == 0) cat('|')
  temp.rich <- get(paste0(thresholds_df$party[i], '_top_boot'))
  temp.rich <- temp.rich[thresholds_df$question[i],]
  temp.poor <- get(paste0(thresholds_df$party[i], '_bottom_boot'))
  temp.poor <- temp.poor[thresholds_df$question[i],]
  
  t <- thresholds_df$threshold[i]
  
  if (median(abs(temp.rich - temp.poor)) >= t) { 
    thresholds_df$polarized[i] <- 1
  } else {
    thresholds_df$polarized[i] <- 0
  }
}

thresholdsplot.df <- thresholds_df %>% 
  mutate(party = case_when(party == 'dem' ~ 'Democrats',
                           party == 'rep' ~ 'Republicans',
                           party == 'all' ~ 'All Respondents')) %>% 
  group_by(party, threshold) %>% 
  summarize(n_pol = sum(polarized)) %>% 
  ungroup() %>% 
  filter(threshold <= 0.35)

ggplot(data = thresholdsplot.df,
       aes(x = threshold, y = n_pol / nrow(issuetopics))) + 
  geom_point(aes(color = party, shape = party), size = 2, alpha = 0.4) + 
  geom_line(aes(color = party), size = 0.75, alpha = 0.5) + 
  labs(title = '',
       y = 'Share of Questions with Opinion Gap',
       x = 'Threshold',
       color = '', shape = '') +
  scale_color_manual(values = c('gray40', 'blue', 'red')) +
  theme_bw() +
  theme(legend.position = 'bottom') +
  scale_y_continuous(labels = scales::percent)
ggsave('figures/num_polarized_by_threshold.png', height = 4, width = 5.5)



# table 3 - correlation of polarization levels

correlation_dist_rep_dem <- c()
correlation_dist_rep_all <- c()
correlation_dist_dem_all <- c()

diff_polarization_boot <- abs(rep_polarization_boot) - abs(dem_polarization_boot)

for (i in 1:ncol(diff_polarization_boot)) {
  correlation_dist_rep_dem[i] <- cor(rep_polarization_boot[,i], dem_polarization_boot[,i])
  correlation_dist_rep_all[i] <- cor(rep_polarization_boot[,i], all_polarization_boot[,i])
  correlation_dist_dem_all[i] <- cor(dem_polarization_boot[,i], all_polarization_boot[,i])
}

correlation_by_domain_rep_dem <- matrix(NA, nrow = length(unique(issuetopics$topic_6)), ncol = ncol(diff_polarization_boot),
                                        dimnames = list(unique(issuetopics$topic_6), c()))
correlation_by_domain_rep_all <- matrix(NA, nrow = length(unique(issuetopics$topic_6)), ncol = ncol(diff_polarization_boot),
                                        dimnames = list(unique(issuetopics$topic_6), c()))
correlation_by_domain_dem_all <- matrix(NA, nrow = length(unique(issuetopics$topic_6)), ncol = ncol(diff_polarization_boot),
                                        dimnames = list(unique(issuetopics$topic_6), c()))

for (t in rownames(correlation_by_domain_rep_dem)) {
  temp.issues <- issuetopics$question[issuetopics$topic_6 == t]
  for (i in 1:ncol(correlation_by_domain_rep_dem)) {
    correlation_by_domain_rep_dem[t,i] <- cor(rep_polarization_boot[temp.issues, i], dem_polarization_boot[temp.issues, i])
    correlation_by_domain_rep_all[t,i] <- cor(rep_polarization_boot[temp.issues, i], all_polarization_boot[temp.issues, i])
    correlation_by_domain_dem_all[t,i] <- cor(dem_polarization_boot[temp.issues, i], all_polarization_boot[temp.issues, i])
  }
}

correlations_out <- data.frame(topic_6 = rownames(correlation_by_domain_rep_dem),
                               cor_rep_dem = NA,
                               cor_rep_all = NA,
                               cor_dem_all = NA,
                               stringsAsFactors = FALSE)

correlations_out$cor_rep_dem <- apply(correlation_by_domain_rep_dem, MARGIN = 1, median)[match(correlations_out$topic_6, 
                                                                                               rownames(correlation_by_domain_rep_dem))]
correlations_out$cor_rep_all <- apply(correlation_by_domain_rep_all, MARGIN = 1, median)[match(correlations_out$topic_6,
                                                                                               rownames(correlation_by_domain_rep_all))]
correlations_out$cor_dem_all <- apply(correlation_by_domain_dem_all, MARGIN = 1, median)[match(correlations_out$topic_6,
                                                                                               rownames(correlation_by_domain_dem_all))]

correlations_out[7,] <- c(topic_6 = 'Total', 
                          cor_rep_dem = median(correlation_by_domain_rep_dem),
                          cor_rep_all = median(correlation_by_domain_rep_all),
                          cor_dem_all = median(correlation_by_domain_dem_all))

correlations_out %>% 
  mutate_at(vars(starts_with('cor_')), funs(round(as.numeric(.), 3))) %>% 
  mutate(topic_6 = case_when(topic_6 == 'foreignpolicy' ~ 'Foreign policy',
                             topic_6 == 'cultural' ~ 'Cultural',
                             topic_6 == 'immigration' ~ 'Immigration',
                             topic_6 == 'lawenforcement' ~ 'Law enforcement',
                             topic_6 == 'economic' ~ 'Economic',
                             topic_6 == 'socialwelfare' ~ 'Social welfare',
                             TRUE ~ topic_6)) %>% 
  kable('latex', booktabs = TRUE, linesep = '',
        col.names = linebreak(c('Policy Domain',
                                'Correlation\n(Rep.-Dem.)',
                                'Correlation\n(Rep.-All)',
                                'Correlation\n(Dem.-All)'),
                              align = 'c'),
        escape = FALSE) %>% 
  save_kable('tables/boot_table_correlations.tex')
